{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "3a3b66a1-55b2-4e59-a16b-d4cad3e799f2",
   "metadata": {},
   "outputs": [],
   "source": [
    "# setting the environment variables\n",
    "import sys\n",
    "import os\n",
    "\n",
    "sys.path.insert(0, os.path.abspath('..'))\n",
    "\n",
    "from config import set_environment\n",
    "set_environment()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "104e8a15-7a55-49e7-adf2-0ca0c932b5c9",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "\n",
    "from git import Repo\n",
    "\n",
    "from langchain_community.document_loaders.generic import GenericLoader\n",
    "from langchain_community.document_loaders.parsers import LanguageParser\n",
    "from langchain_text_splitters import Language, RecursiveCharacterTextSplitter\n",
    "\n",
    "# Clone the book repository from GitHub\n",
    "repo_path = os.path.expanduser(\"~/Downloads/generative_ai_with_langchain\")\n",
    "repo = Repo.clone_from(\"https://github.com/benman1/generative_ai_with_langchain\", to_path=repo_path)\n",
    "\n",
    "# Load the Python code using LanguageParser\n",
    "loader = GenericLoader.from_filesystem(\n",
    "    repo_path,\n",
    "    glob=\"**/*\",\n",
    "    suffixes=[\".py\"],\n",
    "    parser=LanguageParser(language=Language.PYTHON, parser_threshold=500),\n",
    ")\n",
    "documents = loader.load()\n",
    "\n",
    "python_splitter = RecursiveCharacterTextSplitter.from_language(\n",
    "    language=Language.PYTHON, chunk_size=50, chunk_overlap=0\n",
    ")\n",
    "# Split the Document into chunks for embedding and vector storage\n",
    "texts = python_splitter.split_documents(documents)    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "26ed5e12-c75d-45f1-9fb5-b2b5bff12ad3",
   "metadata": {},
   "outputs": [],
   "source": [
    "from langchain_core.prompts import ChatPromptTemplate\n",
    "from langchain_community.vectorstores import Chroma\n",
    "from langchain_openai import ChatOpenAI, OpenAIEmbeddings\n",
    "from langchain.chains.combine_documents import create_stuff_documents_chain\n",
    "from langchain.chains.retrieval import create_retrieval_chain\n",
    "\n",
    "# Store the documents in a vector store\n",
    "db = Chroma.from_documents(texts, OpenAIEmbeddings())\n",
    "retriever = db.as_retriever(search_type=\"mmr\", search_kwargs={\"k\": 8})\n",
    "\n",
    "# Create a retrieval chain for Q&A over code\n",
    "prompt = ChatPromptTemplate.from_messages([\n",
    "    (\"system\", \"Answer the user's questions based on the below context:\\n\\n{context}\"),\n",
    "    (\"placeholder\", \"{chat_history}\"),\n",
    "    (\"user\", \"{input}\"),\n",
    "])\n",
    "document_chain = create_stuff_documents_chain(ChatOpenAI(), prompt)\n",
    "qa = create_retrieval_chain(retriever, document_chain)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "8cf3b80a-2e30-4021-be10-184b9d85878b",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The examples in the code related to software development include the class `PythonDeveloper` and the method `PythonDeveloper(llm_chain=software_llm)`.\n"
     ]
    }
   ],
   "source": [
    "# Ask a question from the book\n",
    "question = \"What examples are in the code related to software development?\"\n",
    "result = qa.invoke({\"input\": question})\n",
    "print(result[\"answer\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f403f3fc-ee79-439c-ab10-d857fd12201f",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
